*********************************************************************************
*** For Journal of the Association of Environmental and Resource Economists
*** This do file produce the final tables in the paper   
***********************************************************************************


tempfile leastcostdist 

set more off 

** Open data ** 

global dir D:/Dropbox/Infant_and_Maternal_Health 
cd  "$dir/Results"
use "$dir/datafiles/IMR_workfile.dta", clear  


gen original_code=code 
replace code=census2010_code 
recode code (419001=411801)    // 河南省省直管济源市 (for match below)
merge m:1 code using "$dir\datafiles\county_waterpoints_leastcostvalue"      // coarse least distance calucualtion, for cross-checking, not estimation 
list code jcdmc if _merge==1 
keep if _merge ==3  
drop _merge  

drop code  
ren original_code code  

drop Name jcdmc type name province city county pyname post_cod


** clean data for regressions **          
keep if year >= 2009 & year<=2011    

unique code if waterquality~=. 


order code 
collapse (mean) year-Cost, by (code)  

*** Least cost distance (calculated at PKU)    
gen l_cost_dist = log(1+cost_dem_river_1)  

gen l_imr = log(1+imr)  
gen l_imr_male  = log(1+imr_male)  
gen l_imr_female = log(1+imr_female)  

gen l_nmr = log(1+nmr)
gen l_nmr_male  = log(1+nmr_male)
gen l_nmr_female = log(1+nmr_female)

gen post_nmr        = imr-nmr 
gen post_nmr_male   = imr_male-nmr_male 
gen post_nmr_female = imr_female-nmr_female 

gen l_post_nmr        = log(1+post_nmr) 
gen l_post_nmr_male   = log(1+post_nmr_male) 
gen l_post_nmr_female = log(1+post_nmr_female) 



* rescale by timing 100 and take logs
foreach y of var mr_d_infant_pneum mr_d_infant_loww mr_d_infant_other {  
	gen l_`y' = log(1+`y')
}

gen prov_code = int(code/10000) 

*** group provinces into several regions    
gen region=. 
replace region=1 if prov_code== 21| prov_code== 22| prov_code== 23 
replace region=2 if prov_code>= 11& prov_code<= 15 
replace region=3 if prov_code>= 31& prov_code<= 37 
replace region=4 if prov_code>= 41& prov_code<= 46 
replace region=5 if prov_code>= 50& prov_code<= 54 
replace region=6 if prov_code>= 61
recode prov_code (12=11)  (31=32)   // each city has only one obs causing singularity problems, I combine them with neighboring province. Dropping them does not affect the results


* Generate Control Variables *
gen l_pcincome = log(pcincome+1)

gen l_gdp=log(gdp)   


*** rural definition (2010 census 乡村人口为674,149,546人，占50%)
*** http://www.stats.gov.cn/tjsj/tjgb/rkpcgb/qgrkpcgb/201104/t20110428_30327.html 

gen rural2=ruralpop/totpop>0.50    

*** indicator for either mountain or minority areas  
gen mountain= Mountain==1 | mountains==1   

foreach x of var fisexp fisexp_h fisexp_inf inst_inf staff_inf tech_inf towns hospital hos_staff hos_full hos_part villiage health_room vill_doc vill_fedoc vill_hstaff vill_fhstaff midwife {
replace `x' = . if `x' ==0 
gen `x'_pc = 100000*`x'/population        
gen l_`x'_pc = log(`x'_pc)        
}

replace sharetapwater = sharetapwater*10       //range 1~10 
* intepretation: per 10 percent change in tap water coverage 


* water pollution groups 
xtile waterquality_g2=waterquality, n(2)   
gen lowwaterquality=(waterquality_g2==2) if waterquality != .  

local regressor1 "l_pcincome "   
local regressor2 "l_pcincome lowwaterquality"   
local regressor4 "l_pcincome l_fisexp_pc l_fisexp_h_pc "   
local regressor5 "l_pcincome l_fisexp_inf_pc l_inst_inf_pc l_staff_inf_pc "   
local regressor3 "l_pcincome sharemanu rural2 mountain"   
local regressors "l_pcincome lowwaterquality l_fisexp_pc l_fisexp_h_pc l_fisexp_inf_pc l_inst_inf_pc l_staff_inf_pc sharemanu rural2 mountain "   

*** control set without mountain variable 
local nomountain "l_pcincome lowwaterquality l_fisexp_pc l_fisexp_h_pc l_fisexp_inf_pc l_inst_inf_pc l_staff_inf_pc sharemanu rural2"   

drop if missing(l_cost_dist) 
foreach v of local regressors {
	drop if missing(`v' )  	
}

** Xizang has only one obs, merge with Sichuang (neighboring province) to avoid singularity problem. Dropping the only location in Xizang does not affect the results.
recode  prov_code (54=51)  


preserve 
keep code  
duplicates drop code, force 
merge 1:1 code using $dir/GIS/imr_sites 
gen sample=_merge==3 
save SampleList, replace  
restore


********** Graphs **********

quietly: tw scatter imr l_cost_dist , msize(large) mcolor(dknavy )  msymbol(Oh)  || lfit imr l_cost_dist [w=totpop], lwidth(thick) ||, ///
graphregion(fcolor(255 255 255)) graphregion(lcolor(255 255 255))  ylabel(,angle(0)) ///  
legend(off) xtitle("Least Cost Distance (log)") ytitle("Infant Mortaltiy Rate (Deaths per 1,000 Live Births)")  

graph export imr_cost.emf, replace  

gen sh_w=sharetapwater/10 
quietly: tw scatter imr sh_w , msize(large) mcolor(dknavy )  msymbol(Oh)  || lfit imr sh_w [w=totpop], lwidth(thick) ||, ///
graphregion(fcolor(255 255 255)) graphregion(lcolor(255 255 255))  ylabel(,angle(0))  ///  
legend(off) xtitle("Share of Piped Water") ytitle("Infant Mortaltiy Rate (Deaths per 1,000 Live Births)")  

graph export imr_tapwater.emf, replace  

quietly: tw scatter l_cost_dist sh_w , msize(large) mcolor(dknavy )  msymbol(Oh)  || lfit l_cost_dist sh_w [w=totpop], lwidth(thick) ||, ///
graphregion(fcolor(255 255 255)) graphregion(lcolor(255 255 255))  ylabel(,angle(0))  ///  
legend(off) xtitle("Share of Piped Water") ytitle("Least Cost Distance (log)")  

graph export cost_tapwater.emf, replace  


save sample.dta, replace   




********** Tables **********


*** Table 2. The Association between Piped Water Coverage and the Least-cost Distance ***
*** Table 4. The Effect of Piped Water Coverage on Infant Mortality Rate *** 

use sample.dta, clear  

cap erase T_IV.xml
cap erase T_IV.txt

*** IV results 
foreach y of var l_imr imr {
	quietly xi: ivreg2 `y' (sharetapwater=l_cost_dist) i.prov_code    [w=totpop]  , robust 
	outreg2 using T_IV, excel keep(sharetapwater) e(rmse) ctitle(no_ctrl) dec(3) adec(3) symbol(**,*) append 
	weakivtest 
	
	quietly xi: ivreg2 `y' `regressor1'  (sharetapwater=l_cost_dist) i.prov_code    [w=totpop]  , robust 
	outreg2 using T_IV, excel keep(sharetapwater)  e(rmse)  ctitle(ctrl_inc) dec(3) adec(3) symbol(**,*) append 
	weakivtest 
	
	quietly xi: ivreg2 `y' `regressor2'   (sharetapwater=l_cost_dist) i.prov_code    [w=totpop]  , robust 
	outreg2 using T_IV, excel keep(sharetapwater)  e(rmse)  ctitle(ctrl_demo) dec(3) adec(3) symbol(**,*) append 
	weakivtest 

	quietly xi: ivreg2 `y' `regressor3'   (sharetapwater=l_cost_dist) i.prov_code    [w=totpop]  , robust 
	outreg2 using T_IV, excel keep(sharetapwater)  e(rmse)  ctitle(ctrl_demo) dec(3) adec(3) symbol(**,*) append 
	weakivtest 
	
	quietly xi: ivreg2 `y' `regressor4' (sharetapwater=l_cost_dist) i.prov_code    [w=totpop]  , robust 
	outreg2 using T_IV, excel keep(sharetapwater) e(rmse) ctitle(ctrl_soec) dec(3) adec(3) symbol(**,*) append 
	weakivtest 
	
	quietly xi: ivreg2 `y' `regressor5' (sharetapwater=l_cost_dist) i.prov_code    [w=totpop]  , robust 
	outreg2 using T_IV, excel keep(sharetapwater)  e(rmse)  ctitle(ctrl_soec_med) dec(3) adec(3) symbol(**,*) append 
	weakivtest 
	
	quietly xi: ivreg2 `y' `regressors' (sharetapwater=l_cost_dist) i.prov_code    [w=totpop]  , robust 
	outreg2 using T_IV, excel keep(sharetapwater)  e(rmse)  ctitle(ctrl_all) dec(3) adec(3) symbol(**,*) append 
	weakivtest 
} 



cap erase T_first.xml
cap erase T_first.txt

*** IV first stage results

quietly xi: reg sharetapwater l_cost_dist i.prov_code    [w=totpop]   , robust 
outreg2 using T_first, excel keep(l_cost_dist) e(F) adec(3) ctitle(no_ctrl)  symbol(**,*) dec(3)  append 

quietly xi: reg sharetapwater l_cost_dist `regressor1' i.prov_code    [w=totpop]   ,robust 
outreg2 using T_first, excel keep(l_cost_dist) e(F) adec(3) ctitle(ctrl_inc)  symbol(**,*) dec(3)  append 

quietly xi: reg sharetapwater l_cost_dist `regressor2'  i.prov_code    [w=totpop]   , robust 
outreg2 using T_first, excel keep(l_cost_dist) e(F) adec(3) ctitle(ctrl_demo)  symbol(**,*) dec(3)  append 

quietly xi: reg sharetapwater l_cost_dist `regressor3'  i.prov_code    [w=totpop]   , robust 
outreg2 using T_first, excel keep(l_cost_dist) e(F) adec(3) ctitle(ctrl_demo)  symbol(**,*) dec(3)  append 

quietly xi: reg sharetapwater l_cost_dist `regressor4' i.prov_code    [w=totpop]   , robust 
outreg2 using T_first, excel keep(l_cost_dist) e(F) adec(3) ctitle(ctrl_soec)  symbol(**,*) dec(3)  append 

quietly xi: reg sharetapwater l_cost_dist `regressor5' i.prov_code    [w=totpop]   , robust 
outreg2 using T_first, excel keep(l_cost_dist) e(F) adec(3) ctitle(ctrl_soec_med)  symbol(**,*) dec(3)  append 

quietly xi: reg sharetapwater l_cost_dist `regressors' i.prov_code    [w=totpop]   , robust 
outreg2 using T_first, excel keep(l_cost_dist) e(F) adec(3) ctitle(ctrl_all)  symbol(**,*) dec(3)  append 



*** Appendix Table A12. The Effect of Piped Water Coverage on Infant Mortality Rate Controlling for Air Pollutants ***

cap erase T_IV_air.xml
cap erase T_IV_air.txt

local airpollutants "pm10 so2"  

*** IV results 
foreach y of var l_imr imr {
	quietly xi: ivreg2 `y' `airpollutants' (sharetapwater=l_cost_dist) i.prov_code    [w=totpop]  , robust 
	outreg2 using T_IV_air, excel keep(sharetapwater) e(rmse) ctitle(no_ctrl) dec(3) adec(3) symbol(**,*) append 
	
	quietly xi: ivreg2 `y' `regressor1'  `airpollutants'  (sharetapwater=l_cost_dist) i.prov_code    [w=totpop]  , robust 
	outreg2 using T_IV_air, excel keep(sharetapwater)  e(rmse)  ctitle(ctrl_inc) dec(3) adec(3) symbol(**,*) append 
	
	quietly xi: ivreg2 `y' `regressor2'  `airpollutants'  (sharetapwater=l_cost_dist) i.prov_code    [w=totpop]  , robust 
	outreg2 using T_IV_air, excel keep(sharetapwater)  e(rmse)  ctitle(ctrl_demo) dec(3) adec(3) symbol(**,*) append 

	quietly xi: ivreg2 `y' `regressor3'  `airpollutants'  (sharetapwater=l_cost_dist) i.prov_code    [w=totpop]  , robust 
	outreg2 using T_IV_air, excel keep(sharetapwater)  e(rmse)  ctitle(ctrl_demo) dec(3) adec(3) symbol(**,*) append 
	
	quietly xi: ivreg2 `y' `regressor4'  `airpollutants' (sharetapwater=l_cost_dist) i.prov_code    [w=totpop]  , robust 
	outreg2 using T_IV_air, excel keep(sharetapwater) e(rmse) ctitle(ctrl_soec) dec(3) adec(3) symbol(**,*) append 
	
	quietly xi: ivreg2 `y' `regressor5'  `airpollutants'  (sharetapwater=l_cost_dist) i.prov_code    [w=totpop]  , robust 
	outreg2 using T_IV_air, excel keep(sharetapwater)  e(rmse)  ctitle(ctrl_soec_med) dec(3) adec(3) symbol(**,*) append 
	
	quietly xi: ivreg2 `y' `regressors'  `airpollutants'  (sharetapwater=l_cost_dist) i.prov_code    [w=totpop]  , robust 
	outreg2 using T_IV_air, excel keep(sharetapwater)  e(rmse)  ctitle(ctrl_all) dec(3) adec(3) symbol(**,*) append 
} 



***** Table 4, column 8: OLS Results *****

cap erase T_ols.xml
cap erase T_ols.txt


foreach y of var l_imr imr{
	quietly xi: reg `y' sharetapwater i.prov_code    [w=totpop]   , robust 
	outreg2 using T_ols, excel keep(sharetapwater) e(rmse) adec(3)  ctitle(no_ctrl)  symbol(**,*) dec(3)  append 

	quietly xi: reg `y' sharetapwater `regressor1' i.prov_code    [w=totpop]   , robust 
	outreg2 using T_ols, excel keep(sharetapwater) e(rmse) adec(3)  ctitle(ctrl_inc)  symbol(**,*) dec(3)  append 

	quietly xi: reg `y' sharetapwater `regressor2' i.prov_code    [w=totpop]   , robust 
	outreg2 using T_ols, excel keep(sharetapwater) e(rmse) adec(3)  ctitle(ctrl_demo)  symbol(**,*) dec(3)  append 

	quietly xi: reg `y' sharetapwater `regressor3' i.prov_code    [w=totpop]   , robust 
	outreg2 using T_ols, excel keep(sharetapwater) e(rmse) adec(3)  ctitle(ctrl_demo)  symbol(**,*) dec(3)  append 

	quietly xi: reg `y' sharetapwater `regressor4' i.prov_code    [w=totpop]   , robust 
	outreg2 using T_ols, excel keep(sharetapwater) e(rmse) adec(3)  ctitle(ctrl_soec)  symbol(**,*) dec(3)  append 

	quietly xi: reg `y' sharetapwater `regressor5' i.prov_code    [w=totpop]   , robust 
	outreg2 using T_ols, excel keep(sharetapwater) e(rmse) adec(3)  ctitle(ctrl_soec_med)  symbol(**,*) dec(3)  append 

	quietly xi: reg `y' sharetapwater `regressors'  i.prov_code    [w=totpop]   , robust 
	outreg2 using T_ols, excel keep(sharetapwater) e(rmse) adec(3)  ctitle(ctrl_all)  symbol(**,*) dec(3)  append 
}




*** Table 3. The Association between the Least-cost Distance and Control Variables *** 
*** Appendix Table A1. The Association Between Least-cost Distance and Local Economic/Health Conditions ***

cap erase T_IV_validity.xml 
cap erase T_IV_validity.txt 


foreach v of varlist l_pcincome l_fisexp_pc l_fisexp_h_pc l_fisexp_inf_pc l_inst_inf_pc l_staff_inf_pc { 
	quietly xi: reg l_cost_dist `v'  i.prov_code    [w=totpop]  , robust 
	outreg2 using T_IV_validity, excel stats(coef se) keep(`v') e(F) adec(3)  symbol(**,*) dec(3)  append 
}

	quietly xi: reg l_cost_dist `regressors' i.prov_code    [w=totpop]  , robust 
	outreg2 using T_IV_validity, excel stats(coef se) keep(`v') e(F) adec(3)  symbol(**,*) dec(3)  append 





***** Appendix Table 16. Gender Differences ***** 

cap erase T_IV_gender.xml
cap erase T_IV_gender.txt

foreach y of var l_imr_male l_imr_female imr_male imr_female {
	quietly xi: ivreg2 `y' (sharetapwater=l_cost_dist) i.prov_code    [w=totpop]  , robust 
	outreg2 using T_IV_gender, excel keep(sharetapwater) e(rmse) ctitle(`y', no_ctrl) dec(3) adec(3) symbol(**,*) append 

	quietly xi: ivreg2 `y' `regressors' (sharetapwater=l_cost_dist) i.prov_code    [w=totpop]  , robust 
	outreg2 using T_IV_gender, excel keep(sharetapwater)  e(rmse)  ctitle(`y', ctrl_all) dec(3) adec(3) symbol(**,*) append 
} 

cap erase T_ols_gender.xml
cap erase T_ols_gender.txt

foreach y of var l_imr_male l_imr_female imr_male imr_female {
	quietly xi: reg `y' sharetapwater i.prov_code    [w=totpop]   , robust 
	outreg2 using T_ols_gender, excel keep(sharetapwater) e(rmse) adec(3)  ctitle(`y',no_ctrl)  symbol(**,*) dec(3)  append 

	quietly xi: reg `y' sharetapwater `regressors'  i.prov_code    [w=totpop]   , robust 
	outreg2 using T_ols_gender, excel keep(sharetapwater) e(rmse) adec(3) ctitle(`y',ctrl_all)  symbol(**,*) dec(3)  append 
}



*** Appendix Table A2. The Heterogeneous Effects of Piped Water Coverage on Infant Mortality Rate *** 

cap erase T_IV_nmr.xml
cap erase T_IV_nmr.txt

*** IV results: Neonatal &  by gener
foreach y of var l_nmr l_nmr_male l_nmr_female  nmr nmr_male nmr_female {
	quietly xi: ivreg2 `y' (sharetapwater=l_cost_dist) i.prov_code    [w=totpop]  , robust 
	outreg2 using T_IV_nmr, excel keep(sharetapwater) e(rmse) ctitle(`y',no_ctrl) dec(3) adec(3) symbol(**,*) append 

	quietly xi: ivreg2 `y' `regressors' (sharetapwater=l_cost_dist) i.prov_code    [w=totpop]  , robust 
	outreg2 using T_IV_nmr, excel keep(sharetapwater)  e(rmse)  ctitle(`y',ctrl_all) dec(3) adec(3) symbol(**,*) append 
}

cap erase T_IV_post_nmr.xml
cap erase T_IV_post_nmr.txt

foreach y of var l_post_nmr l_post_nmr_male l_post_nmr_female post_nmr post_nmr_male post_nmr_female {
	quietly xi: ivreg2 `y' (sharetapwater=l_cost_dist) i.prov_code    [w=totpop]  , robust 
	outreg2 using T_IV_nmr, excel keep(sharetapwater) e(rmse) ctitle(`y',no_ctrl) dec(3) adec(3) symbol(**,*) append 

	quietly xi: ivreg2 `y' `regressors' (sharetapwater=l_cost_dist) i.prov_code    [w=totpop]  , robust 
	outreg2 using T_IV_nmr, excel keep(sharetapwater)  e(rmse)  ctitle(`y',ctrl_all) dec(3) adec(3) symbol(**,*) append 
}






*** Table 5. The Effects of Piped Water Coverage on Infant Mortality Rate: by Gender and by Cause-of Death ***
                                   
cap erase T_IV_cause.xml
cap erase T_IV_cause.txt


*** IV results: causes 

foreach y of var l_mr_d_infant_tumor l_mr_d_infant_pneum l_mr_d_infant_loww	l_mr_d_infant_tao   {   
 
	quietly xi: ivreg2 `y' (sharetapwater=l_cost_dist) i.prov_code    [w=totpop]  , robust 
	outreg2 using T_IV_cause, excel keep(sharetapwater) e(rmse) ctitle(`y', no_ctrl) dec(3) adec(3) symbol(**,*) append 

	quietly xi: ivreg2 `y' `regressors' (sharetapwater=l_cost_dist) i.prov_code    [w=totpop]  , robust 
	outreg2 using T_IV_cause, excel keep(sharetapwater)  e(rmse)  ctitle(`y', ctrl_all) dec(3) adec(3) symbol(**,*) append 
}




*** Table 4, Panel B. The Effect of Piped Water Coverage on Infant Mortality Rate ***

cap erase T_IV_level.xml
cap erase T_IV_level.txt

foreach y of var imr imr_male imr_female mr_d_infant_pneum mr_d_infant_loww mr_d_infant_tao {   
 
	quietly xi: ivreg2 `y' `regressors' (sharetapwater=l_cost_dist) i.prov_code    [w=totpop]  , robust 
	outreg2 using T_IV_level, excel keep(sharetapwater)  e(rmse)  ctitle(`y', ctrl_all_level) dec(3) adec(3) symbol(**,*) append 
}




*** Table 6. The Effect of Least-cost Distance on Infant Mortality (Reduced Form) ***

cap erase T_reduced.xml 
cap erase T_reduced.txt 


foreach y of var l_imr l_imr_male l_imr_female l_nmr l_post_nmr l_mr_d_infant_pneum l_mr_d_infant_anorm l_mr_d_infant_loww l_mr_d_infant_tao {	 
	quietly xi: reg `y' `regressors' l_cost_dist i.prov_code    [w=totpop]   , robust 
	outreg2 using T_reduced, excel keep(l_cost_dist)  e(rmse)  ctitle(`y', low, ctrl_all) dec(3) adec(3) symbol(**,*) append 
} 


  


*** Appendix Table A16. The Effect of Piped Water Coverage on Infant Mortality Rate: Rural vs. Urban Areas ***

use sample.dta, clear 

cap erase T_IV_rural.xml
cap erase T_IV_rural.txt

cap gen rural2=ruralpop/totpop>0.5   
gen prov_urban=prov_code  
recode prov_urban (11=13) (14=15) (41=42) (35=44)  

foreach y of var l_imr l_imr_male l_imr_female  l_mr_d_infant_pneum  l_mr_d_infant_loww l_mr_d_infant_tao  {   
	quietly xi: ivreg2 `y' `regressors' (sharetapwater=l_cost_dist) i.prov_code    [w=totpop]   if rural2==1 |Rural==1, robust 
	outreg2 using T_IV_rural, excel keep(sharetapwater)  e(rmse)  ctitle(`y'_ctrl_all, rural2&Rural) dec(3) adec(3) symbol(**,*) append 
}  
foreach y of var l_imr l_imr_male l_imr_female  l_mr_d_infant_pneum  l_mr_d_infant_loww l_mr_d_infant_tao  {   
	quietly xi: ivreg2 `y' `regressors' (sharetapwater=l_cost_dist) i.prov_urban    [w=totpop]   if rural2==0 & Rural==0, robust 
	outreg2 using T_IV_rural, excel keep(sharetapwater)  e(rmse)  ctitle(`y'_ctrl_all, urban2&Urban) dec(3) adec(3) symbol(**,*) append 
}   




*** Table 8. The Effect of Piped Water Coverage on Infant Mortality Rate: Slightly Polluted vs. Severely Polluted Regions *** 
*** Appendix Table A5. The Association between Piped Water Coverage and Least-cost Distance (First Stage Results for Slightly and Highly Polluted Locations) ***
*** Appendix Table A6. The Effect of Least-Cost Distance on Infant Mortality Rate: Slightly Polluted vs. Severely Polluted Regions (Reduced Form) *** 

use sample.dta, clear  

cap erase T_IV_pollution.xml 
cap erase T_IV_pollution.txt 
cap erase T_pollution_first.xml 
cap erase T_pollution_first.txt 
cap erase T_pollution_reduced.xml 
cap erase T_pollution_reduced.txt 

hist waterquality, graphregion(fcolor(255 255 255)) graphregion(lcolor(255 255 255))  xtitle(Water Pollution Grade) width(0.5) density kdensity color(ltblue) lcolor(white)
graph export pollutiondist.jpg, replace  

xtile waterquality_group2=waterquality, n(2) 

tab prov_code if waterquality_group2==1 
tab prov_code if waterquality_group2==2

gen prov_highpol=prov_code   
recode prov_highpol (43=42) (64=63)  


foreach y of var l_imr l_imr_male l_imr_female  l_mr_d_infant_pneum  l_mr_d_infant_loww l_mr_d_infant_tao  {	 
	quietly xi: ivreg2 `y' (sharetapwater=l_cost_dist) i.prov_code    [w=totpop]   if waterquality_group2==1, robust 
	outreg2 using T_IV_pollution, excel keep(sharetapwater) e(rmse) ctitle(`y', low, no_ctrl) dec(3) adec(3) symbol(**,*) append 
} 
foreach y of var l_imr l_imr_male l_imr_female  l_mr_d_infant_pneum  l_mr_d_infant_loww l_mr_d_infant_tao  {	 
	quietly xi: ivreg2 `y' `regressors' (sharetapwater=l_cost_dist) i.prov_code    [w=totpop]   if waterquality_group2==1, robust 
	outreg2 using T_IV_pollution, excel keep(sharetapwater)  e(rmse)  ctitle(`y', low, ctrl_all) dec(3) adec(3) symbol(**,*) append 
} 

foreach y of var l_imr l_imr_male l_imr_female  l_mr_d_infant_pneum  l_mr_d_infant_loww l_mr_d_infant_tao  {	 
	quietly xi: ivreg2 `y' (sharetapwater=l_cost_dist) i.prov_code    [w=totpop]   if waterquality_group2==2, robust 
	outreg2 using T_IV_pollution, excel keep(sharetapwater) e(rmse) ctitle(`y', high, no_ctrl) dec(3) adec(3) symbol(**,*) append 
} 
foreach y of var l_imr l_imr_male l_imr_female  l_mr_d_infant_pneum  l_mr_d_infant_loww l_mr_d_infant_tao  {	 
	quietly xi: ivreg2 `y' `regressors' (sharetapwater=l_cost_dist) i.prov_code    [w=totpop]   if waterquality_group2==2, robust 
	outreg2 using T_IV_pollution, excel keep(sharetapwater)  e(rmse)  ctitle(`y', high, ctrl_all) dec(3) adec(3) symbol(**,*) append 
} 


quietly xi: reg sharetapwater l_cost_dist `regressors' i.prov_code    [w=totpop]   if waterquality_group2==1, robust 
outreg2 using T_pollution_first, excel keep(l_cost_dist)  e(rmse)  ctitle(`y'_ctrl_all, low) dec(3) adec(3) symbol(**,*) append 

quietly xi: reg sharetapwater l_cost_dist `regressors' i.prov_code     [w=totpop]   if waterquality_group2==2, robust 
outreg2 using T_pollution_first, excel keep(l_cost_dist)  e(rmse)  ctitle(`y'_ctrl_all, high) dec(3) adec(3) symbol(**,*) append 


xi: ivreg2 l_imr `regressors' i.prov_code  (sharetapwater=l_cost_dist)  [w=totpop]   if waterquality_group2==1, robust first savefirst partial(i.prov_code) 
est restore _ivreg2_sharetapwater   
ivreg2
outreg2 using T_pollution_first, excel keep(l_cost_dist) e(rmse)  ctitle(`y'_ctrl_all, low) dec(3) adec(3) symbol(**,*) append 

xi: ivreg2 l_imr `regressors' i.prov_code  (sharetapwater=l_cost_dist)  [w=totpop]   if waterquality_group2==2, robust first savefirst partial(i.prov_code) 
est restore _ivreg2_sharetapwater   
ivreg2 
outreg2 using T_pollution_first, excel keep(l_cost_dist) e(rmse)  ctitle(`y'_ctrl_all, high) dec(3) adec(3) symbol(**,*) append 

foreach y of var l_imr l_imr_male l_imr_female l_mr_d_infant_pneum l_mr_d_infant_loww l_mr_d_infant_tao {	 
	quietly xi: reg `y' `regressors' l_cost_dist i.prov_code    [w=totpop] if waterquality_group2==1 , robust 
	outreg2 using T_pollution_reduced, excel keep(l_cost_dist)  e(rmse)  ctitle(`y', low) dec(3) adec(3) symbol(**,*) append 
} 
foreach y of var l_imr l_imr_male l_imr_female l_mr_d_infant_pneum l_mr_d_infant_loww l_mr_d_infant_tao {	 
	quietly xi: reg `y' `regressors' l_cost_dist i.prov_code    [w=totpop] if waterquality_group2==2  , robust 
	outreg2 using T_pollution_reduced, excel keep(l_cost_dist)  e(rmse)  ctitle(`y', high) dec(3) adec(3) symbol(**,*) append 
} 


**** Appendix Table A7: Exclude minority counties ****

use sample.dta, clear  

cap erase T_IV_Han.xml 
cap erase T_IV_Han.txt 


foreach y of var l_imr l_imr_male l_imr_female l_mr_d_infant_pneum  l_mr_d_infant_loww l_mr_d_infant_tao   {	 
	quietly xi: ivreg2 `y' `regressors' (sharetapwater=l_cost_dist) i.prov_code    [w=totpop]   if minority_county==0 | minority<0.5 , robust 
	outreg2 using T_IV_Han, excel keep(sharetapwater)  e(rmse)  ctitle(`y', def1, ctrl_all) dec(3) adec(3) symbol(**,*) append 
} 


*** Appendix Table A8. Robustness Check: Excluding Mountain Indicator from the Controls and Mountainous Locations ***

use sample.dta, clear 

cap erase T_IV_Mountain.xml 
cap erase T_IV_Mountain.txt 

local nomountain "l_pcincome lowwaterquality l_fisexp_pc l_fisexp_h_pc l_fisexp_inf_pc l_inst_inf_pc l_staff_inf_pc sharemanu rural2"   

foreach y of var l_imr l_imr_male l_imr_female l_mr_d_infant_pneum l_mr_d_infant_loww l_mr_d_infant_tao  {	 
	quietly xi: ivreg2 `y' `nomountain' (sharetapwater=l_cost_dist) i.prov_code    [w=totpop]   , robust 
	outreg2 using T_IV_nomountain, excel keep(sharetapwater)  e(rmse)  ctitle(`y', def1, ctrl_all) dec(3) adec(3) symbol(**,*) append 
} 
 
drop if mountain==1     

foreach y of var l_imr l_imr_male l_imr_female l_mr_d_infant_pneum l_mr_d_infant_loww l_mr_d_infant_tao  {	 
	quietly xi: ivreg2 `y' `regressors' (sharetapwater=l_cost_dist) i.prov_code    [w=totpop]  , robust 
	outreg2 using T_IV_Mountain, excel keep(sharetapwater)  e(rmse)  ctitle(`y', def1, ctrl_all) dec(3) adec(3) symbol(**,*) append 
} 



*** Table 7. The Effect of Piped Water Coverage on Under-5 Child Mortality Rate ***

use sample.dta, clear  

cap erase T_IV_child.xml 
cap erase T_IV_child.txt 

gen total15=m1+f1  
gen d15_child_tao = d15_child_tumor + d15_child_anorm + d15_child_other 

foreach v in  d15_child d15_child_pneum d15_child_loww d15_child_tao { 
	gen mr_`v'=`v'*1000/total15 	
	gen l_mr_`v'=log(1+`v'*1000/total15)  	
}

gen mr_d15_child_male   = d15_child_male*1000  /m1  
gen mr_d15_child_female = d15_child_female*1000/f1  
gen l_mr_d15_child_male   = log(1+d15_child_male*1000  /m1)   
gen l_mr_d15_child_female = log(1+d15_child_female*1000/f1)    


foreach y of var l_mr_d15_child l_mr_d15_child_male l_mr_d15_child_female l_mr_d15_child_pneum l_mr_d15_child_loww l_mr_d15_child_tao mr_d15_child mr_d15_child_male mr_d15_child_female mr_d15_child_pneum mr_d15_child_loww mr_d15_child_tao {
	quietly xi: ivreg2 `y' `regressors' (sharetapwater=l_cost_dist) i.prov_code    [w=totpop]  , robust 
	outreg2 using T_IV_Child, excel keep(sharetapwater)  e(rmse)  ctitle(`y', ctrl_all) dec(3) adec(3) symbol(**,*) append 
} 


*** Table A4. The Effect of Least-Cost Distance on Under-5 Mortality Rate (Reduced Form) ***

use sample.dta, clear  

cap erase T_reduced_under5.xml 
cap erase T_reduced_under5.txt 

foreach y of var l_mr_d15_child l_mr_d15_child_male l_mr_d15_child_female l_mr_d15_child_pneum l_mr_d15_child_anorm l_mr_d15_child_loww l_mr_d15_child_tao mr_d15_child mr_d15_child_male mr_d15_child_female mr_d15_child_pneum mr_d15_child_anorm mr_d15_child_loww mr_d15_child_tao {
	quietly xi: reg `y' `regressors' l_cost_dist i.prov_code    [w=totpop]   , robust 
	outreg2 using T_reduced_under5, excel keep(l_cost_dist)  e(rmse)  ctitle(`y', low, ctrl_all) dec(3) adec(3) symbol(**,*) append 
} 


*** Appendix Table A7, Panel A: Excluding High Migration Areas ***

cap erase T_IV_migration.xml
cap erase T_IV_migration.txt

foreach y of var l_imr l_imr_male l_imr_female l_mr_d_infant_pneum l_mr_d_infant_loww l_mr_d_infant_tao  {   
	quietly xi: ivreg2 `y' `regressors' (sharetapwater=l_cost_dist) i.prov_code    [w=totpop]   if mig_pct4<0.20, robust 
	outreg2 using T_IV_migration, excel keep(sharetapwater)  e(rmse)  ctitle(`y'_ctrl_all, mig4) dec(3) adec(3) symbol(**,*) append 
}
 


*** Appendix Table A9. IV results of Dropping Locations Ranked Top and Bottom X% of both Economic Activities and Population Density *** 
*** Appendix Table A10. IV results of Dropping Locations with Top X% of Economic Activities or Population Density ***  

use sample.dta, clear 

recode code 340124=341421  
recode code 419001=411801
recode code 469036=469030

merge 1:1 code using $dir\dofiles\JAERErevision\quxian_2010.dta 
list code if _merge==1 

gen city=int(code/100) 
bysort city: egen citypop=sum(population) 
bysort city: egen cityarea=sum(area) 
drop if _merge==2 
drop _merge 

gen popden=population/area
gen citypopden = citypop/cityarea 

gen l_popden=log(popden)
gen l_citypopden=log(citypopden)

local regressor1 "l_pcincome "   
local regressor2 "l_pcincome lowwaterquality"   
local regressor4 "l_pcincome l_fisexp_pc l_fisexp_h_pc "   
local regressor5 "l_pcincome l_fisexp_inf_pc l_inst_inf_pc l_staff_inf_pc "   
local regressor3 "l_pcincome sharemanu rural2 mountain"   
local regressors "l_pcincome lowwaterquality l_fisexp_pc l_fisexp_h_pc l_fisexp_inf_pc l_inst_inf_pc l_staff_inf_pc sharemanu rural2 mountain "   


cap erase T_IVA9.xml
cap erase T_IVA9.txt
cap erase T_IVA10.xml
cap erase T_IVA10.txt

xtile y=l_pcincome, n(20) 
xtile z=l_popden, n(20) 

foreach i in 20 19 18 17  { 

	preserve 
	drop if y>=`i'  
	
		quietly xi: ivreg2 l_imr `regressors' (sharetapwater=l_cost_dist) i.prov_code    [w=totpop]  , robust 
		outreg2 using T_IVA10, excel keep(sharetapwater)  e(rmse)  ctitle(ctrl_all) dec(3) adec(3) symbol(**,*) append 
	
	restore 
} 

foreach i in 20 19 18 17  {

	preserve 
	drop if z>=`i'  
	
		quietly xi: ivreg2 l_imr `regressors' (sharetapwater=l_cost_dist) i.prov_code    [w=totpop]  , robust 
		outreg2 using T_IVA10, excel keep(sharetapwater)  e(rmse)  ctitle(ctrl_all) dec(3) adec(3) symbol(**,*) append 
	
	restore 
} 

recode y (20=1) (19=2) (18=3) (17=4)  
recode z (20=1) (19=2) (18=3) (17=4)  

foreach i in 1 2 3 4  { 

	preserve 
	drop if z<=`i' & y<=`i' 
	
		quietly xi: ivreg2 l_imr `regressors' (sharetapwater=l_cost_dist) i.prov_code    [w=totpop]  , partial(i.prov_code) robust 
		outreg2 using T_IVA9, excel keep(sharetapwater)  e(rmse)  ctitle(income&popden) dec(3) adec(3) symbol(**,*) append 

	restore 
} 



*** Appendix Table A14. The Relationship between the Least Cost Distance and the NMCHMS-to-River Distance ***                           
*** Appendix Table A15. IV Results with Extra County-River Distance Controls ***                                                        
                                                                                                                                                    
*** correlation between lcd variable and the closest distance from the county center to river                                                      

use sample.dta, clear   
                                                                                                                                                    
cap erase T_IV_riverdist.xml                                                                                                                       
cap erase T_IV_riverdist.txt                                                                                                                       
                                                                                                                                                   
                                                                                                                          
merge 1:1 code using "$path\county_centroid2river.dta"                        
drop if _merge==2                                                                                                                                  
                                                                                                                                                   
corr min_dist1 l_cost_dist                                                                                                                         
corr min_dist23 l_cost_dist                                                                                                                        
corr min_dist4 l_cost_dist                                                                                                                         
corr min_dist5 l_cost_dist                                                                                                                         
                                                                                                                                                   
*** regressing the closest distance from the county center to river on lcd variable                                                                
                                                                                                                                                   
reg min_dist1 l_cost_dist                                                                                                                          
outreg2 using corr, excel dec(3) symbol(**,*) replace                                                                                              
reg min_dist23 l_cost_dist                                                                                                                         
outreg2 using corr, excel dec(3) symbol(**,*) append                                                                                               
reg min_dist4 l_cost_dist                                                                                                                          
outreg2 using corr, excel dec(3) symbol(**,*) append                                                                                               
reg min_dist5 l_cost_dist                                                                                                                          
outreg2 using corr, excel dec(3) symbol(**,*) append                                                                                               
                                                                                                                                                   
                                                                                                                                                  
quietly xi: ivreg2 l_imr `regressors' min_dist1 (sharetapwater=l_cost_dist) i.prov_code    [w=totpop]  , robust                               
outreg2 using T_IV_riverdist, excel keep(sharetapwater)  e(rmse)  ctitle(add class dist) dec(3) adec(3) symbol(**,*) append                        
quietly xi: ivreg2 l_imr `regressors' min_dist23 (sharetapwater=l_cost_dist) i.prov_code    [w=totpop]  , robust                                   
outreg2 using T_IV_riverdist, excel keep(sharetapwater)  e(rmse)  ctitle(add class dist) dec(3) adec(3) symbol(**,*) append                        
quietly xi: ivreg2 l_imr `regressors' min_dist4 (sharetapwater=l_cost_dist) i.prov_code    [w=totpop]  , robust                                    
outreg2 using T_IV_riverdist, excel keep(sharetapwater)  e(rmse)  ctitle(add class dist) dec(3) adec(3) symbol(**,*) append                        
quietly xi: ivreg2 l_imr `regressors' min_dist5 (sharetapwater=l_cost_dist) i.prov_code    [w=totpop]  , robust                                    
outreg2 using T_IV_riverdist, excel keep(sharetapwater)  e(rmse)  ctitle(add class dist) dec(3) adec(3) symbol(**,*) append                        
                                                                                                                                                   
quietly xi: ivreg2 l_imr `regressors' min_dist* (sharetapwater=l_cost_dist) i.prov_code    [w=totpop]  , robust                                    
outreg2 using T_IV_riverdist, excel keep(sharetapwater)  e(rmse)  ctitle(add class dist) dec(3) adec(3) symbol(**,*) append                        
                                                                                                                                                   
                                                                                                                                                    
*** Appendix Table A11. IV Results Using Different Measures of Economic Activities and Excluding Cities Involved in South-to-North Water Diversion Project *** 

tempfile f1                                                                                                                                         
use $path\gdppc.dta, clear                                                                                                                                
collapse (mean) 地区生产总值_万元 人均生产总值_万元 年末常住人口数_万人 年末户籍人口数_万人, by(code)                                                                                 
gen gdppc=人均生产总值_万元                                                                                                                                 
replace gdppc=地区生产总值_万元/年末户籍人口数_万人 if gdppc==.                                                                                          
replace gdppc=地区生产总值_万元/年末常住人口数_万人 if gdppc==.                                                                                          
save `f1', replace                                                                                                                                 
                                                                                                                                                   
use sample.dta, clear                                                                                                                             
drop _merge                                                                                                                                        
merge 1:1 code using `f1'                                                                                                                          
drop if _merge==2                                                                                                                                  
drop _merge                                                                                                                                        
                                                                                                                                                   
local regressorsinc1 "lowwaterquality l_fisexp_pc l_fisexp_h_pc l_fisexp_inf_pc l_inst_inf_pc l_staff_inf_pc sharemanu rural2 mountain "              
local regressors  "l_pcincome lowwaterquality l_fisexp_pc l_fisexp_h_pc l_fisexp_inf_pc l_inst_inf_pc l_staff_inf_pc sharemanu rural2 mountain "  
local regressorsinc3 "l_gdppc lowwaterquality l_fisexp_pc l_fisexp_h_pc l_fisexp_inf_pc l_inst_inf_pc l_staff_inf_pc sharemanu rural2 mountain "      
local regressorsinc4 "l_citylight lowwaterquality l_fisexp_pc l_fisexp_h_pc l_fisexp_inf_pc l_inst_inf_pc l_staff_inf_pc sharemanu rural2 mountain "  
                                                                                                                                                   
cap erase T_IV_income.xml                                                                                                                                 
cap erase T_IV_income.txt                                                                                                                                 
                                                                                                                                                                                                                                                                                                   
replace gdppc=地区生产总值_万元/totpop if gdppc==.                                                                                                    
gen l_gdppc = log(gdppc+1)                                                                                                                         
                                                                                                                                                   
	xi: ivreg2 l_imr `regressorsinc1' (sharetapwater=l_cost_dist ) i.prov_code    [w=totpop]  , first robust             // no income                   
	outreg2 using T_IV_income, excel keep(sharetapwater)  e(rmse)  ctitle(ctrl_soec_med) dec(3) adec(3) symbol(**,*) append                                 
                                                                                                                                                   
	xi: ivreg2 l_imr `regressors' (sharetapwater=l_cost_dist ) i.prov_code    [w=totpop]  , first robust              // add l_pcincome              
	outreg2 using T_IV_income, excel keep(sharetapwater)  e(rmse)  ctitle(ctrl_soec_med) dec(3) adec(3) symbol(**,*) append                                 
                                                                                                                                                   
	xi: ivreg2 l_imr `regressorsinc3' (sharetapwater=l_cost_dist ) i.prov_code    [w=totpop]  , first robust              // add l_gdppc                 
	outreg2 using T_IV_income, excel keep(sharetapwater)  e(rmse)  ctitle(ctrl_soec_med) dec(3) adec(3) symbol(**,*) append                                  
                                                                                                                                                    
	xi: ivreg2 l_imr `regressorsinc4' l_citylight (sharetapwater=l_cost_dist ) i.prov_code    [w=totpop]  , first robust   // add light image data       
	outreg2 using T_IV_income, excel keep(sharetapwater)  e(rmse)  ctitle(ctrl_soec_med) dec(3) adec(3) symbol(**,*) append                                  
                                                                                                                                                    

*** Appendix Table A13. IV Results Excluding Cities Involved in South-to-North Water Diversion Project ***                                                                                                                                                   
* 东线流经 	淮安、宿迁、徐州, 枣庄、济宁、聊城、济南、淄博, 沧州，衡水
* 中线流经 	南阳、平顶山、许昌、郑州、焦作, 新乡、鹤壁、安阳、邯郸、邢台, 石家庄、保定、天津、北京 

use sample.dta, clear 

cap erase T_IV_income.xml                                                                                                                                 
cap erase T_IV_income.txt                                                                                                                                 

foreach v in 3208 3213 3203 3704 3708 3715 3701 3703 1309 1311 4113 4104 4110 4101 4108 4107 4106 4105 1301 1304 1305 1306 1201 1202 {
drop if citycode == `v' 
} 

quietly xi: ivreg2 l_imr `regressors' (sharetapwater=l_cost_dist) i.prov_code    [w=totpop]  , robust 
outreg2 using T_IV_waterproject, excel keep(sharetapwater)  e(rmse)  ctitle(南水北调) dec(3) adec(3) symbol(**,*) append 




***** Table 1. Summary Stats ***** 

replace sharetapwater=sharetapwater/10 
replace pcincome=pcincome/1000 
replace fisexp_pc=fisexp_pc/1000 
replace fisexp_h_pc=fisexp_h_pc/1000  
replace fisexp_inf_pc=fisexp_inf_pc/1000   
 
                                                                                                 
gen imr_rural=imr 
replace imr_rural=. if rural2==1 |Rural==1                                                                                                 
gen imr_urban=imr 
replace imr_urban=. if rural2==0 &Rural==0
sum imr* 


cap erase sumimr.xml 
cap erase sumimr.txt 


reg imr imr_male imr_female nmr post_nmr mr_d_infant_pneum mr_d_infant_anorm mr_d_infant_loww mr_d_infant_tao  Cost sharetapwater lowwaterquality pcincome fisexp_pc fisexp_h_pc fisexp_inf_pc inst_inf_pc staff_inf_pc  sharemanu rural2 mountain 
outreg2 using sumimr ,             sum excel  stats(mean sd) append 
reg imr imr_male
outreg2 using sumimr if rural2==1, sum excel  stats(mean sd) append   
reg imr imr_male   
outreg2 using sumimr if rural2==0, sum excel  stats(mean sd) append     

*outreg2 using sumimr if sharetapwater>5, sum excel  stats(mean sd)  symbol(**,*) dec(3)  append 
*outreg2 using sumimr if sharetapwater<=5, sum excel  stats(mean sd)  symbol(**,*) dec(3)  append 

*univar imr imr_male imr_female nmr post_nmr mr_d_infant_tumor mr_d_infant_pneum mr_d_infant_loww mr_d_infant_other mr_d_infant_ta Cost sharetapwater  sharemanu   pcincome fisexp_pc fisexp_h_pc inst_inf_pc staff_inf_pc  
tabstat imr imr_male imr_female imr_rural imr_urban nmr post_nmr mr_d_infant_pneum mr_d_infant_loww mr_d_infant_tao Cost sharetapwater lowwaterquality pcincome fisexp_pc fisexp_h_pc fisexp_inf_pc inst_inf_pc staff_inf_pc  sharemanu rural2 mountain , stat(p25 p50 p75) columns(statistics) format(%10.2f) save 
putexcel set tabstat.xlsx, replace 
putexcel A1 = matrix(r(StatTotal)'), names      







